clear all
cd "C:\Users\Public\Documents\ImmPanelRevis19\"
cd "ImmPanelRevis19\Donnees\select"

/*****************************/
/* WAGES WAGES WAGES 3 YEARS */
/*****************************/
capture program drop wages3y
program define wages3y
version 11
syntax, occp(name) 

/********************/
/* Balanced Sample  */
/********************/
use final_3y.dta, clear

/* keep individuals in the occupation group */
keep if (btime==1 & `occp') | (btime==2 & L.`occp')

/* location in the decile of the initial group distribution */
gen lsnd=ln(snPT/dpP)
/* residual wages for each year */
quietly: reg lsnd F_* 
predict rlsnd, residuals
/* change in residual wages */
gen drlsnd=D.rlsnd

gen ldp=ln(dp/l.dp)
gen oshift=(`occp'==0 & L.`occp'==1)
keep if btime==2 & L.`occp' & !missing(dimm) & !missing(dimm75)
foreach year in 1982 1991 1999 2007 {
/* winsorize drlsnd */
quietly: sum drlsnd if an==`year', d
replace drlsnd=. if drlsnd>`r(p99)' & an==`year'
replace drlsnd=. if drlsnd<`r(p1)' & an==`year'
}
drop if missing(drlsnd)
drop if missing(basman)
drop if missing(bascom)

/* weight inverse of the size of the group in the CZ */
save temp.dta, replace
gen dummy=1
collapse (sum) nbze = dummy , by(ze an)
gen iw2=1/ nbze
drop nbze

save iw2.dta, replace
use temp.dta, replace
capture drop _merge
joinby ze an using iw2.dta , unm(m)
drop _merge

save temp1.dta, replace
collapse drlsnd dimm dimm75 Y_*, by(ze an)

/* OLS */
ivreg2 drlsnd dimm Y_*  , partial(Y_* ) cl(ze)
estimates store bs`occp'ols

/* IV */
ivreg2 drlsnd (dimm  = dimm75 ) Y_* , partial(Y_* ) cl(ze)
estimates store bs`occp'iv

end

wages3y, occp(all) 
wages3y, occp(cad) 
wages3y, occp(tecemp2) 
wages3y, occp(bcq) 
wages3y, occp(bcnq) 

capture rm "C:\Users\Public\Documents\Tab21\Table12_Median.rtf"
/* IV Balanced */
esttab bsalliv  bscadiv bstecemp2iv bsbcqiv bsbcnqiv ///
using "C:\Users\Public\Documents\Tab21\Table12_Median.rtf" /// 
, append title("bs")  b(%9.3f) cells(b(star fmt(3)) se(fmt(3) par) ) ///
 stats(N widstat) star(* 0.10 ** 0.05 *** 0.01)

/****************/
/* MEDIAN WAGES */
/****************/
clear all
cd "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\select"
use "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\indiv.dta", clear
joinby nninouv using "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\nydads.dta", unm(m)
drop if nydads==1
drop _merge
/***************************************************/
/* Construct the table final.dta for the estimates */
/***************************************************/
/**************************************/
/* Définition des zones géographiques */
/**************************************/
/**************************************/
/* 1) => Merge avec les ZE 			  */
/**************************************/
/* elimine la corse */
drop if dep=="2A" | dep=="2B"

gen cgeo=dep+comt
sort cgeo
/* Paris */
replace cgeo="75056" if dep=="75"
/* Lyon */
replace cgeo="69123" if cgeo=="69381" | cgeo=="69382" | cgeo=="69383" | cgeo=="69384" | cgeo=="69385" | cgeo=="69386" | cgeo=="69387" | cgeo=="69388" | cgeo=="69389" 
/* Marseille */
replace cgeo="13055" if cgeo=="13201" | cgeo=="13202" | cgeo=="13203" | cgeo=="13204" | cgeo=="13205" | cgeo=="13206" | cgeo=="13207" | cgeo=="13208" | cgeo=="13209" | cgeo=="13210" | cgeo=="13211" | cgeo=="13212" | cgeo=="13213" | cgeo=="13214" | cgeo=="13215" | cgeo=="13216"

/* définit les ZE */
joinby cgeo using "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\cgeo_ze.dta", unm(m)
tab _merge
drop if _merge==1
drop _merge
drop zen

/* foreign born workers */
gen fborn=(depnai=="99")
gen ifborn="N" if fborn==0
replace ifborn="FB" if fborn==1
drop if fborn

/*********************/
/* add task contents */
/*********************/
joinby cs2h using "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\Onet\CS2_OnetNorm.dta", unm(m)
tab _merge
drop _merge

save temp.dta, replace

/***************************/
/* Add contemporary groups */
/***************************/
use "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\ident_groupmw.dta", clear
rename gref an
save gref.dta, replace

use temp.dta, clear
joinby nninouv an using gref.dta, unm(m)
tab _merge
drop _merge

egen time=group(an)
xtset indiv time
compress

save temp.dta, replace

/* define our group of interest age 25-50 */
keep if ((age>=25) & (age<=50))

drop if an==2007
keep nninouv
contract nninouv 
drop _freq
gen treat=1
save treat.dta, replace

/* only keep 'treated' individuals */
use temp.dta, clear
joinby nninouv using treat.dta, unm(m)
drop if missing(treat)
drop treat

/* define valid first year observation */
gen ftreat=((age>=25) & (age<=50)) 

save temp2.dta, replace

/* Panel A : 76-82 */
use temp2.dta, clear
/* Balanced sample: treated in first & second period */
keep if an==1976 | an==1982
keep if (ftreat==1 & an==1976 & !missing(F.an)) | (L.ftreat==1 & an==1982)
tab an
gen nind="A"+nninouv
egen btime=group(an)
save tempA.dta, replace

/* Panel B : 82-91 */
use temp2.dta, clear
/* Balanced sample: treated in first & second period */
keep if an==1982 | an==1991
keep if (ftreat==1 & an==1982 & !missing(F.an)) | (L.ftreat==1 & an==1991)
tab an
gen nind="B"+nninouv
egen btime=group(an)
save tempB.dta, replace

/* Panel C : 91-99 */
use temp2.dta, clear
/* Balanced sample: treated in first & second period */
keep if an==1991 | an==1999
keep if (ftreat==1 & an==1991 & !missing(F.an)) | (L.ftreat==1 & an==1999)
tab an
gen nind="C"+nninouv
egen btime=group(an)
save tempC.dta, replace

/* Panel D : 99-07 */
use temp2.dta, clear
/* Balanced sample: treated in first & second period */
keep if an==1999 | an==2007
keep if (ftreat==1 & an==1999 & !missing(F.an)) | (L.ftreat==1 & an==2007)
tab an
gen nind="D"+nninouv
egen btime=group(an)
save tempD.dta, replace

/* Merge all balanced sample */
use tempA.dta, clear
append using tempB.dta
append using tempC.dta
append using tempD.dta
gen balanced=1
save temp3.dta, replace

/* KEEP THOSE UNOBSERVED AND IMPUTE ZERO LOG WAGES IN T+1 */
/* Panel A : 76-82 */
use temp2.dta, clear
/* Balanced sample: treated in first & second period */
keep if an==1976 | an==1982
keep if (ftreat==1 & an==1976 & missing(F.an))
save first.dta, replace
replace an=1982
replace sn=0
replace dp=0
append using first.dta
gen nind="A"+nninouv
egen btime=group(an)
save tempA.dta, replace

/* Panel B : 82-91 */
use temp2.dta, clear
/* Balanced sample: treated in first & second period */
keep if an==1982 | an==1991
keep if (ftreat==1 & an==1982 & missing(F.an)) 
save first.dta, replace
replace an=1991
replace sn=0
replace dp=0
append using first.dta
gen nind="B"+nninouv
egen btime=group(an)
save tempB.dta, replace

/* Panel C : 91-99 */
use temp2.dta, clear
/* Balanced sample: treated in first & second period */
keep if an==1991 | an==1999
keep if (ftreat==1 & an==1991 & missing(F.an))
save first.dta, replace
replace an=1999
replace sn=0
replace dp=0
append using first.dta
gen nind="C"+nninouv
egen btime=group(an)
save tempC.dta, replace

/* Panel D : 99-07 */
use temp2.dta, clear
/* Balanced sample: treated in first & second period */
keep if an==1999 | an==2007
keep if (ftreat==1 & an==1999 & missing(F.an))
save first.dta, replace
replace an=2007
replace sn=0
replace dp=0
append using first.dta
gen nind="D"+nninouv
egen btime=group(an)
save tempD.dta, replace

/* Merge all balanced sample */
use tempA.dta, clear
append using tempB.dta
append using tempC.dta
append using tempD.dta
gen balanced=0
save temp4.dta, replace

use temp3.dta, clear
append using temp4.dta
tab balanced
save temp3.dta, replace

/* keep the original location */
drop if btime==2
keep nind ze
rename ze oze
save oze.dta, replace

use temp3.dta, clear
drop _merge
joinby nind using oze.dta, unm(m)
rename ze aze
rename oze ze
drop _merge

/**********************************************************/
/* Merge with immigration shocks in the original location */
/**********************************************************/
joinby ze an using "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\pit\instru2017.dta", unm(m)
tab _merge
drop _merge

/*********************/
/* change in outcome */
/*********************/
egen indiv2=group(nind)
xtset indiv2 btime
sort indiv2 btime

/**************/
/* occupation */
/**************/
/* Changes in task intensity */
gen dzbasman=D.zbasman 
gen dzbascom=D.zbascom 

/* fixed effects */
xi, pre(Y_) i.an*lsconst i.an*lstrad i.an*lsntrad i.an*lzepop

/* location shifter */
gen lshift=(ze!=aze)
joinby aze an using tempAZE.dta, unm(m)
/* change in share of immigrants in the commuting zone 
gen dzeimmig=azeimmig-zeimmig */
/* change in average wage in the commuting zone */
gen damlsnd=D.amlsnd
/* change in total population in the commuting zone */
gen dazepop=ln(azepop)-ln(L.azepop)

/* housing costs */
drop _merge
joinby aze using "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\Loyer\cloyer2.dta", unm(m)
drop _merge
joinby aze an using "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\Loyer\ipc_Local.dta", unm(m)
drop _merge

/* wage adjusted for housing costs */
gen lsnd6=ln((sn/dp)*(100/ipcl6))
gen lsnd20=ln((sn/dp)*(100/ipcl20))
gen lsnd30=ln((sn/dp)*(100/ipcl30))

gen hcost=cloyer2-L.cloyer2
gen lhcost=ln(cloyer2)-ln(L.cloyer2)
gen liloy=ln(iloy)-ln(L.iloy)

gen elim=(ze=="9306" | ze=="9109" | ze=="9110" | ze=="9116" | ///
ze=="8307" | ze=="7206" | ze=="5214" | ze=="4101" | ze=="4111" | ze=="8306")
tab elim

/* eliminate small CZ */
drop if ze=="9306" | ze=="9109" | ze=="9110" | ze=="9116" | ze=="8307" | ze=="7206" | ze=="5214" 
drop if ze=="4101" | ze=="4111" | ze=="8306"

/* elimine outliers de l'âge */
drop if btime==2 & age<32
drop if btime==2 & age>59

drop if btime==1 & missing(F.age)

/* fixed effects age x an */
xi i.age*i.an , pre(F_)

/* merge technicians & clerks */
gen tecemp2=(tec==1 | emp2==1)

save finalmw_imput.dta, replace

use finalmw_imput.dta, clear
keep if homme==1
save final_imput.dta, replace
use finalmw_imput.dta, clear
keep if homme==0
save finalw_imput.dta, replace

use final.dta, clear
joinby nninouv using "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\nydads.dta", unm(m)
drop if nydads==1
drop _merge
save finalrdx.dta, replace

use final_imput.dta, clear
joinby nninouv using "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\nydads.dta", unm(m)
drop if nydads==1
drop _merge
save final_imputrdx.dta, replace

/* Median wages instead of average wages */
/*********************/
/* WAGES WAGES WAGES */
/*********************/
capture program drop wagesMD
program define wagesMD
version 11
syntax, occp(name) 

/********************/
/* Balanced Sample  */
/********************/
use finalrdx.dta, clear

/* keep individuals in the occupation group */
keep if (btime==1 & `occp') | (btime==2 & L.`occp')

/* location in the decile of the initial group distribution */
gen lsnd=ln(sn/dp)
/* residual wages for each year */
quietly: reg lsnd F_* 
predict rlsnd, residuals
/* change in residual wages */
gen drlsnd=D.rlsnd

gen ldp=ln(dp/l.dp)
gen oshift=(`occp'==0 & L.`occp'==1)
keep if btime==2 & L.`occp' & !missing(dimm) & !missing(dimm75)
foreach year in 1982 1991 1999 2007 {
/* winsorize drlsnd */
quietly: sum drlsnd if an==`year', d
replace drlsnd=. if drlsnd>`r(p99)' & an==`year'
replace drlsnd=. if drlsnd<`r(p1)' & an==`year'
}
drop if missing(drlsnd)
drop if missing(basman)
drop if missing(bascom)

/* weight inverse of the size of the group in the CZ */
save temp.dta, replace
gen dummy=1
collapse (sum) nbze = dummy , by(ze an)
gen iw2=1/ nbze
drop nbze

save iw2.dta, replace
use temp.dta, replace
capture drop _merge
joinby ze an using iw2.dta , unm(m)
drop _merge

save temp1.dta, replace
collapse (mean) drlsnd dimm dimm75 Y_* (median) mdrlsnd=drlsnd, by(ze an)

/* IV */
ivreg2 mdrlsnd (dimm  = dimm75 ) Y_* , partial(Y_* ) cl(ze)
estimates store md`occp'iv

end

wagesMD, occp(all) 
wagesMD, occp(cad) 
wagesMD, occp(tecemp2) 
wagesMD, occp(bcq) 
wagesMD, occp(bcnq) 

estout mdalliv mdcadiv  mdtecemp2iv mdbcqiv mdbcnqiv, cells(b(star fmt(%9.3f)) ///
se(par(`"="("'`")""'))) stats(N r2 widstat) starlevels(* 0.10 ** 0.05 *** 0.01)
/* median no imputations */
esttab mdalliv mdcadiv  mdtecemp2iv mdbcqiv mdbcnqiv ///
using "C:\Users\Public\Documents\Tab21\Table12_Median.rtf" /// 
, append title("median")  b(%9.3f) cells(b(star fmt(3)) se(fmt(3) par) ) ///
 stats(N widstat) star(* 0.10 ** 0.05 *** 0.01)
 
/* Median wages with zero imputed */
capture program drop Imputwagesmedian
program define Imputwagesmedian
version 11
syntax, occp(name) 

/********************/
/* Balanced Sample  */
/********************/
use final_imputrdx.dta, clear

/* keep individuals in the occupation group */
keep if (btime==1 & `occp') | (btime==2 & L.`occp')

/* location in the decile of the initial group distribution */
gen lsnd=ln(sn/dp)
replace lsnd=0 if dp==0
/* residual wages for each year */
quietly: reg lsnd F_* 
predict rlsnd, residuals
/* change in residual wages */
gen drlsnd=D.rlsnd
/* if dp==0 then drlsnd=0 */
replace drlsnd=0 if dp==0 

collapse (mean) drlsnd dimm dimm75 dimtm2 ldimm ldimtm2 Y_*   (median) mdrlsnd=drlsnd mlsnd=lsnd, by(ze an btime)

gen vague="v1" if (an==1976 & btime==1) | (an==1982 & btime==2)
replace vague="v2" if (an==1982 & btime==1) | (an==1991 & btime==2)
replace vague="v3" if (an==1991 & btime==1) | (an==1999 & btime==2)
replace vague="v4" if (an==1999 & btime==1) | (an==2007 & btime==2)
gen vze=vague+"X"+ze
egen ize=group(vze)
xtset ize btime

drop if mlsnd==0
gen dmlsnd=D.mlsnd
/* IV */
ivreg2 dmlsnd (dimm  = dimm75 ) Y_* , partial(Y_* ) cl(ze)
estimates store `occp'

end

Imputwagesmedian, occp(all)
Imputwagesmedian, occp(cad2)
Imputwagesmedian, occp(tecemp2)
Imputwagesmedian, occp(bcq) 
Imputwagesmedian, occp(bcnq) 

estout all cad2 tecemp2 bcq bcnq , cells(b(star fmt(%9.3f)) ///
se(par(`"="("'`")""'))) stats(N r2 widstat) starlevels(* 0.10 ** 0.05 *** 0.01)

/* median zero imputed */
esttab all cad2 tecemp2 bcq bcnq  ///
using "C:\Users\Public\Documents\Tab21\Table12_Median.rtf" /// 
, append title("median zero")  b(%9.3f) cells(b(star fmt(3)) se(fmt(3) par) ) ///
 stats(N widstat) star(* 0.10 ** 0.05 *** 0.01)

/*****************************************/
/* Median wages imputed last observation */
/*****************************************/
use "C:\Users\Public\Documents\ImmPanelRevis19\ImmPanelRevis19\Donnees\indiv_dispm.dta", clear
keep nninouv sn dp an
/* wage and number of days worked of the LAST observation within a cell */
rename sn snl 
rename dp dpl
save temp.dta, replace

capture program drop lastobs
program define lastobs
version 11
syntax, y1(integer) y2(integer)
use temp.dta, clear
keep if an>=`y1' & an<=`y2'
sort nninouv an
bysort nninouv: keep if _n==_N
replace an=`y2'
gen btime=2
save b`y2'.dta, replace
end

lastobs , y1(1976) y2(1982)
lastobs , y1(1982) y2(1991)
lastobs , y1(1991) y2(1999)
lastobs , y1(1999) y2(2007)

use b1982.dta, clear
append using b1991.dta b1999.dta b2007.dta

save lastobs.dta, replace
rm b1982.dta
rm b1991.dta
rm b1999.dta
rm b2007.dta

/* Median wages with last obs imputed */
capture program drop ImputwagesmedianLO
program define ImputwagesmedianLO
version 11
syntax, occp(name) 

/********************/
/* Balanced Sample  */
/********************/
use final_imputrdx.dta, clear

/* keep individuals in the occupation group */
keep if (btime==1 & `occp') | (btime==2 & L.`occp')
joinby nninouv an btime using lastobs.dta, unm(m)

replace sn=snl if btime==2 & sn==0
replace dp=dpl if btime==2 & dp==0

/* location in the decile of the initial group distribution */
gen lsnd=ln(sn/dp)
replace lsnd=0 if dp==0
/* residual wages for each year */
quietly: reg lsnd F_* 
predict rlsnd, residuals
/* change in residual wages */
gen drlsnd=D.rlsnd
/* if dp==0 then drlsnd=0 */
replace drlsnd=0 if dp==0 

collapse (mean) drlsnd dimm dimm75 dimtm2 ldimm ldimtm2 Y_*   (median) mdrlsnd=drlsnd mlsnd=lsnd, by(ze an btime)

gen vague="v1" if (an==1976 & btime==1) | (an==1982 & btime==2)
replace vague="v2" if (an==1982 & btime==1) | (an==1991 & btime==2)
replace vague="v3" if (an==1991 & btime==1) | (an==1999 & btime==2)
replace vague="v4" if (an==1999 & btime==1) | (an==2007 & btime==2)
gen vze=vague+"X"+ze
egen ize=group(vze)
xtset ize btime

drop if mlsnd==0
gen dmlsnd=D.mlsnd
/* IV */
ivreg2 dmlsnd (dimm  = dimm75 ) Y_* , partial(Y_* ) cl(ze)
estimates store `occp'

end

ImputwagesmedianLO, occp(all)
ImputwagesmedianLO, occp(cad2)
ImputwagesmedianLO, occp(tecemp2)
ImputwagesmedianLO, occp(bcq) 
ImputwagesmedianLO, occp(bcnq) 

estout all cad2 tecemp2 bcq bcnq , cells(b(star fmt(%9.3f)) ///
se(par(`"="("'`")""'))) stats(N r2 widstat) starlevels(* 0.10 ** 0.05 *** 0.01)

/* median last observed wage imputed */
esttab all cad2 tecemp2 bcq bcnq  ///
using "C:\Users\Public\Documents\Tab21\Table12_Median.rtf" /// 
, append title("median last")  b(%9.3f) cells(b(star fmt(3)) se(fmt(3) par) ) ///
 stats(N widstat) star(* 0.10 ** 0.05 *** 0.01)
